{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_kddcup_path = './data/kddcup_data_corrected.csv'\n",
    "kddcup_data = pd.read_csv(file_kddcup_path)\n",
    "data_columns =  [\"duration\",\"protocol_type\",\"service\",\"flag\",\"src_bytes\",\n",
    "            \"dst_bytes\",\"land_f\",\"wrong_fragment\",\"urgent\",\"hot\",\"num_failed_logins\",\n",
    "            \"logged_in\",\"num_compromised\",\"root_shell\",\"su_attempted\",\"num_root\",\n",
    "            \"num_file_creations\",\"num_shells\",\"num_access_files\",\"num_outbound_cmds\",\n",
    "            \"is_host_login\",\"is_guest_login\",\"count\",\"srv_count\",\"serror_rate\",\n",
    "            \"srv_serror_rate\",\"rerror_rate\",\"srv_rerror_rate\",\"same_srv_rate\",\n",
    "            \"diff_srv_rate\",\"srv_diff_host_rate\",\"dst_host_count\",\"dst_host_srv_count\",\n",
    "            \"dst_host_same_srv_rate\",\"dst_host_diff_srv_rate\",\"dst_host_same_src_port_rate\",\n",
    "            \"dst_host_srv_diff_host_rate\",\"dst_host_serror_rate\",\"dst_host_srv_serror_rate\",\n",
    "            \"dst_host_rerror_rate\",\"dst_host_srv_rerror_rate\",\"error_labels\",\"labels\"]\n",
    "add_data_columns = [\"duration\",\"protocol_type\",\"service\",\"flag\",\"src_bytes\",\n",
    "            \"dst_bytes\",\"land_f\",\"wrong_fragment\",\"urgent\",\"hot\",\"num_failed_logins\",\n",
    "            \"logged_in\",\"num_compromised\",\"root_shell\",\"su_attempted\",\"num_root\",\n",
    "            \"num_file_creations\",\"num_shells\",\"num_access_files\",\"num_outbound_cmds\",\n",
    "            \"is_host_login\",\"is_guest_login\",\"count\",\"srv_count\",\"serror_rate\",\n",
    "            \"srv_serror_rate\",\"rerror_rate\",\"srv_rerror_rate\",\"same_srv_rate\",\n",
    "            \"diff_srv_rate\",\"srv_diff_host_rate\",\"dst_host_count\",\"dst_host_srv_count\",\n",
    "            \"dst_host_same_srv_rate\",\"dst_host_diff_srv_rate\",\"dst_host_same_src_port_rate\",\n",
    "            \"dst_host_srv_diff_host_rate\",\"dst_host_serror_rate\",\"dst_host_srv_serror_rate\",\n",
    "            \"dst_host_rerror_rate\",\"dst_host_srv_rerror_rate\",\"labels\"]\n",
    "kddcup_data.columns = data_columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>protocol_type</th>\n",
       "      <th>service</th>\n",
       "      <th>flag</th>\n",
       "      <th>src_bytes</th>\n",
       "      <th>dst_bytes</th>\n",
       "      <th>land_f</th>\n",
       "      <th>wrong_fragment</th>\n",
       "      <th>urgent</th>\n",
       "      <th>hot</th>\n",
       "      <th>...</th>\n",
       "      <th>dst_host_same_srv_rate</th>\n",
       "      <th>dst_host_diff_srv_rate</th>\n",
       "      <th>dst_host_same_src_port_rate</th>\n",
       "      <th>dst_host_srv_diff_host_rate</th>\n",
       "      <th>dst_host_serror_rate</th>\n",
       "      <th>dst_host_srv_serror_rate</th>\n",
       "      <th>dst_host_rerror_rate</th>\n",
       "      <th>dst_host_srv_rerror_rate</th>\n",
       "      <th>error_labels</th>\n",
       "      <th>labels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>162</td>\n",
       "      <td>4528</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal.</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>236</td>\n",
       "      <td>1228</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal.</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>233</td>\n",
       "      <td>2032</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal.</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>239</td>\n",
       "      <td>486</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal.</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>238</td>\n",
       "      <td>1282</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal.</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 43 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   duration protocol_type service flag  src_bytes  dst_bytes  land_f  \\\n",
       "0         0           tcp    http   SF        162       4528       0   \n",
       "1         0           tcp    http   SF        236       1228       0   \n",
       "2         0           tcp    http   SF        233       2032       0   \n",
       "3         0           tcp    http   SF        239        486       0   \n",
       "4         0           tcp    http   SF        238       1282       0   \n",
       "\n",
       "   wrong_fragment  urgent  hot  ...  dst_host_same_srv_rate  \\\n",
       "0               0       0    0  ...                     1.0   \n",
       "1               0       0    0  ...                     1.0   \n",
       "2               0       0    0  ...                     1.0   \n",
       "3               0       0    0  ...                     1.0   \n",
       "4               0       0    0  ...                     1.0   \n",
       "\n",
       "   dst_host_diff_srv_rate  dst_host_same_src_port_rate  \\\n",
       "0                     0.0                         1.00   \n",
       "1                     0.0                         0.50   \n",
       "2                     0.0                         0.33   \n",
       "3                     0.0                         0.25   \n",
       "4                     0.0                         0.20   \n",
       "\n",
       "   dst_host_srv_diff_host_rate  dst_host_serror_rate  \\\n",
       "0                          0.0                   0.0   \n",
       "1                          0.0                   0.0   \n",
       "2                          0.0                   0.0   \n",
       "3                          0.0                   0.0   \n",
       "4                          0.0                   0.0   \n",
       "\n",
       "   dst_host_srv_serror_rate  dst_host_rerror_rate  dst_host_srv_rerror_rate  \\\n",
       "0                       0.0                   0.0                       0.0   \n",
       "1                       0.0                   0.0                       0.0   \n",
       "2                       0.0                   0.0                       0.0   \n",
       "3                       0.0                   0.0                       0.0   \n",
       "4                       0.0                   0.0                       0.0   \n",
       "\n",
       "   error_labels  labels  \n",
       "0       normal.  normal  \n",
       "1       normal.  normal  \n",
       "2       normal.  normal  \n",
       "3       normal.  normal  \n",
       "4       normal.  normal  \n",
       "\n",
       "[5 rows x 43 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kddcup_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "kddcup_data_labels = kddcup_data['labels']\n",
    "kddcup_data = kddcup_data.drop('error_labels', axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the shape of KDD99 data is (1048575, 42)\n"
     ]
    }
   ],
   "source": [
    "kddcup_data.shape\n",
    "print('the shape of KDD99 data is {0}'.format(kddcup_data.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>protocol_type</th>\n",
       "      <th>service</th>\n",
       "      <th>flag</th>\n",
       "      <th>src_bytes</th>\n",
       "      <th>dst_bytes</th>\n",
       "      <th>land_f</th>\n",
       "      <th>wrong_fragment</th>\n",
       "      <th>urgent</th>\n",
       "      <th>hot</th>\n",
       "      <th>...</th>\n",
       "      <th>dst_host_srv_count</th>\n",
       "      <th>dst_host_same_srv_rate</th>\n",
       "      <th>dst_host_diff_srv_rate</th>\n",
       "      <th>dst_host_same_src_port_rate</th>\n",
       "      <th>dst_host_srv_diff_host_rate</th>\n",
       "      <th>dst_host_serror_rate</th>\n",
       "      <th>dst_host_srv_serror_rate</th>\n",
       "      <th>dst_host_rerror_rate</th>\n",
       "      <th>dst_host_srv_rerror_rate</th>\n",
       "      <th>labels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>162</td>\n",
       "      <td>4528</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>236</td>\n",
       "      <td>1228</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>233</td>\n",
       "      <td>2032</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>239</td>\n",
       "      <td>486</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>238</td>\n",
       "      <td>1282</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 42 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   duration protocol_type service flag  src_bytes  dst_bytes  land_f  \\\n",
       "0         0           tcp    http   SF        162       4528       0   \n",
       "1         0           tcp    http   SF        236       1228       0   \n",
       "2         0           tcp    http   SF        233       2032       0   \n",
       "3         0           tcp    http   SF        239        486       0   \n",
       "4         0           tcp    http   SF        238       1282       0   \n",
       "\n",
       "   wrong_fragment  urgent  hot  ...  dst_host_srv_count  \\\n",
       "0               0       0    0  ...                   1   \n",
       "1               0       0    0  ...                   2   \n",
       "2               0       0    0  ...                   3   \n",
       "3               0       0    0  ...                   4   \n",
       "4               0       0    0  ...                   5   \n",
       "\n",
       "   dst_host_same_srv_rate  dst_host_diff_srv_rate  \\\n",
       "0                     1.0                     0.0   \n",
       "1                     1.0                     0.0   \n",
       "2                     1.0                     0.0   \n",
       "3                     1.0                     0.0   \n",
       "4                     1.0                     0.0   \n",
       "\n",
       "   dst_host_same_src_port_rate  dst_host_srv_diff_host_rate  \\\n",
       "0                         1.00                          0.0   \n",
       "1                         0.50                          0.0   \n",
       "2                         0.33                          0.0   \n",
       "3                         0.25                          0.0   \n",
       "4                         0.20                          0.0   \n",
       "\n",
       "   dst_host_serror_rate  dst_host_srv_serror_rate  dst_host_rerror_rate  \\\n",
       "0                   0.0                       0.0                   0.0   \n",
       "1                   0.0                       0.0                   0.0   \n",
       "2                   0.0                       0.0                   0.0   \n",
       "3                   0.0                       0.0                   0.0   \n",
       "4                   0.0                       0.0                   0.0   \n",
       "\n",
       "   dst_host_srv_rerror_rate  labels  \n",
       "0                       0.0  normal  \n",
       "1                       0.0  normal  \n",
       "2                       0.0  normal  \n",
       "3                       0.0  normal  \n",
       "4                       0.0  normal  \n",
       "\n",
       "[5 rows x 42 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kddcup_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0 'icmp' 'red_i' ... 0.11 0 'normal']\n",
      " [0 'icmp' 'urh_i' ... 0.0 0 'normal']\n",
      " [0 'tcp' 'http_8001' ... 0.87 0 'satan']\n",
      " ...\n",
      " [0 'icmp' 'urh_i' ... 0.0 0 'normal']\n",
      " [0 'tcp' 'harvest' ... 0.85 1 'satan']\n",
      " [0 'icmp' 'tim_i' ... 0.0 0 'pod']]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>32</th>\n",
       "      <th>33</th>\n",
       "      <th>34</th>\n",
       "      <th>35</th>\n",
       "      <th>36</th>\n",
       "      <th>37</th>\n",
       "      <th>38</th>\n",
       "      <th>39</th>\n",
       "      <th>40</th>\n",
       "      <th>41</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>162</td>\n",
       "      <td>4528</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>236</td>\n",
       "      <td>1228</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>233</td>\n",
       "      <td>2032</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>239</td>\n",
       "      <td>486</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>238</td>\n",
       "      <td>1282</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 42 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  0    1     2   3    4     5  6  7  8  9   ... 32 33 34    35 36 37 38 39 40  \\\n",
       "0  0  tcp  http  SF  162  4528  0  0  0  0  ...  1  1  0     1  0  0  0  0  0   \n",
       "1  0  tcp  http  SF  236  1228  0  0  0  0  ...  2  1  0   0.5  0  0  0  0  0   \n",
       "2  0  tcp  http  SF  233  2032  0  0  0  0  ...  3  1  0  0.33  0  0  0  0  0   \n",
       "3  0  tcp  http  SF  239   486  0  0  0  0  ...  4  1  0  0.25  0  0  0  0  0   \n",
       "4  0  tcp  http  SF  238  1282  0  0  0  0  ...  5  1  0   0.2  0  0  0  0  0   \n",
       "\n",
       "       41  \n",
       "0  normal  \n",
       "1  normal  \n",
       "2  normal  \n",
       "3  normal  \n",
       "4  normal  \n",
       "\n",
       "[5 rows x 42 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_add_kddcup_data = pd.read_csv('./data/add_to_kdd_data.csv')\n",
    "np_add_kddcup_data = np.array(df_add_kddcup_data)\n",
    "np_add_kddcup_data = np.delete(np_add_kddcup_data, [0], axis=1)\n",
    "print(np_add_kddcup_data)\n",
    "np_kddcup_data = np.array(kddcup_data)\n",
    "np_kddcup_data = np.concatenate((np_kddcup_data, np_add_kddcup_data))\n",
    "df_kddcup_data = pd.DataFrame(np_kddcup_data)\n",
    "df_kddcup_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>protocol_type</th>\n",
       "      <th>service</th>\n",
       "      <th>flag</th>\n",
       "      <th>src_bytes</th>\n",
       "      <th>dst_bytes</th>\n",
       "      <th>land_f</th>\n",
       "      <th>wrong_fragment</th>\n",
       "      <th>urgent</th>\n",
       "      <th>hot</th>\n",
       "      <th>...</th>\n",
       "      <th>dst_host_srv_count</th>\n",
       "      <th>dst_host_same_srv_rate</th>\n",
       "      <th>dst_host_diff_srv_rate</th>\n",
       "      <th>dst_host_same_src_port_rate</th>\n",
       "      <th>dst_host_srv_diff_host_rate</th>\n",
       "      <th>dst_host_serror_rate</th>\n",
       "      <th>dst_host_srv_serror_rate</th>\n",
       "      <th>dst_host_rerror_rate</th>\n",
       "      <th>dst_host_srv_rerror_rate</th>\n",
       "      <th>labels</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>162</td>\n",
       "      <td>4528</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>236</td>\n",
       "      <td>1228</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>233</td>\n",
       "      <td>2032</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>239</td>\n",
       "      <td>486</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>238</td>\n",
       "      <td>1282</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 42 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  duration protocol_type service flag src_bytes dst_bytes land_f  \\\n",
       "0        0           tcp    http   SF       162      4528      0   \n",
       "1        0           tcp    http   SF       236      1228      0   \n",
       "2        0           tcp    http   SF       233      2032      0   \n",
       "3        0           tcp    http   SF       239       486      0   \n",
       "4        0           tcp    http   SF       238      1282      0   \n",
       "\n",
       "  wrong_fragment urgent hot  ... dst_host_srv_count dst_host_same_srv_rate  \\\n",
       "0              0      0   0  ...                  1                      1   \n",
       "1              0      0   0  ...                  2                      1   \n",
       "2              0      0   0  ...                  3                      1   \n",
       "3              0      0   0  ...                  4                      1   \n",
       "4              0      0   0  ...                  5                      1   \n",
       "\n",
       "  dst_host_diff_srv_rate dst_host_same_src_port_rate  \\\n",
       "0                      0                           1   \n",
       "1                      0                         0.5   \n",
       "2                      0                        0.33   \n",
       "3                      0                        0.25   \n",
       "4                      0                         0.2   \n",
       "\n",
       "  dst_host_srv_diff_host_rate dst_host_serror_rate dst_host_srv_serror_rate  \\\n",
       "0                           0                    0                        0   \n",
       "1                           0                    0                        0   \n",
       "2                           0                    0                        0   \n",
       "3                           0                    0                        0   \n",
       "4                           0                    0                        0   \n",
       "\n",
       "  dst_host_rerror_rate dst_host_srv_rerror_rate  labels  \n",
       "0                    0                        0  normal  \n",
       "1                    0                        0  normal  \n",
       "2                    0                        0  normal  \n",
       "3                    0                        0  normal  \n",
       "4                    0                        0  normal  \n",
       "\n",
       "[5 rows x 42 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_kddcup_data.columns=add_data_columns\n",
    "df_kddcup_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>protocol_type</th>\n",
       "      <th>service</th>\n",
       "      <th>flag</th>\n",
       "      <th>src_bytes</th>\n",
       "      <th>dst_bytes</th>\n",
       "      <th>land_f</th>\n",
       "      <th>wrong_fragment</th>\n",
       "      <th>urgent</th>\n",
       "      <th>hot</th>\n",
       "      <th>...</th>\n",
       "      <th>dst_host_count</th>\n",
       "      <th>dst_host_srv_count</th>\n",
       "      <th>dst_host_same_srv_rate</th>\n",
       "      <th>dst_host_diff_srv_rate</th>\n",
       "      <th>dst_host_same_src_port_rate</th>\n",
       "      <th>dst_host_srv_diff_host_rate</th>\n",
       "      <th>dst_host_serror_rate</th>\n",
       "      <th>dst_host_srv_serror_rate</th>\n",
       "      <th>dst_host_rerror_rate</th>\n",
       "      <th>dst_host_srv_rerror_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>162</td>\n",
       "      <td>4528</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>236</td>\n",
       "      <td>1228</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>233</td>\n",
       "      <td>2032</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>239</td>\n",
       "      <td>486</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>238</td>\n",
       "      <td>1282</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 41 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  duration protocol_type service flag src_bytes dst_bytes land_f  \\\n",
       "0        0           tcp    http   SF       162      4528      0   \n",
       "1        0           tcp    http   SF       236      1228      0   \n",
       "2        0           tcp    http   SF       233      2032      0   \n",
       "3        0           tcp    http   SF       239       486      0   \n",
       "4        0           tcp    http   SF       238      1282      0   \n",
       "\n",
       "  wrong_fragment urgent hot  ... dst_host_count dst_host_srv_count  \\\n",
       "0              0      0   0  ...              1                  1   \n",
       "1              0      0   0  ...              2                  2   \n",
       "2              0      0   0  ...              3                  3   \n",
       "3              0      0   0  ...              4                  4   \n",
       "4              0      0   0  ...              5                  5   \n",
       "\n",
       "  dst_host_same_srv_rate dst_host_diff_srv_rate dst_host_same_src_port_rate  \\\n",
       "0                      1                      0                           1   \n",
       "1                      1                      0                         0.5   \n",
       "2                      1                      0                        0.33   \n",
       "3                      1                      0                        0.25   \n",
       "4                      1                      0                         0.2   \n",
       "\n",
       "  dst_host_srv_diff_host_rate dst_host_serror_rate dst_host_srv_serror_rate  \\\n",
       "0                           0                    0                        0   \n",
       "1                           0                    0                        0   \n",
       "2                           0                    0                        0   \n",
       "3                           0                    0                        0   \n",
       "4                           0                    0                        0   \n",
       "\n",
       "  dst_host_rerror_rate dst_host_srv_rerror_rate  \n",
       "0                    0                        0  \n",
       "1                    0                        0  \n",
       "2                    0                        0  \n",
       "3                    0                        0  \n",
       "4                    0                        0  \n",
       "\n",
       "[5 rows x 41 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_kddcup_data.to"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "70"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_kdd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
